{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import model_selection, preprocessing, metrics, linear_model, pipeline, ensemble\n",
    "import numpy as np\n",
    "\n",
    "import seaborn as sns\n",
    "\n",
    "import scipy\n",
    "\n",
    "np.set_printoptions(suppress=True, precision=4)\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "plt.style.use(\"ggplot\")\n",
    "plt.rcParams[\"figure.figsize\"] = 10, 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 2919 entries, 0 to 2918\n",
      "Data columns (total 80 columns):\n",
      "Id               2919 non-null int64\n",
      "MSSubClass       2919 non-null int64\n",
      "MSZoning         2919 non-null object\n",
      "LotFrontage      2919 non-null float64\n",
      "LotArea          2919 non-null int64\n",
      "Street           2919 non-null object\n",
      "Alley            2919 non-null object\n",
      "LotShape         2919 non-null object\n",
      "LandContour      2919 non-null object\n",
      "LotConfig        2919 non-null object\n",
      "LandSlope        2919 non-null object\n",
      "Neighborhood     2919 non-null object\n",
      "Condition1       2919 non-null object\n",
      "Condition2       2919 non-null object\n",
      "BldgType         2919 non-null object\n",
      "HouseStyle       2919 non-null object\n",
      "OverallQual      2919 non-null int64\n",
      "OverallCond      2919 non-null int64\n",
      "YearBuilt        2919 non-null int64\n",
      "YearRemodAdd     2919 non-null int64\n",
      "RoofStyle        2919 non-null object\n",
      "RoofMatl         2919 non-null object\n",
      "Exterior1st      2919 non-null object\n",
      "Exterior2nd      2919 non-null object\n",
      "MasVnrType       2919 non-null object\n",
      "MasVnrArea       2919 non-null float64\n",
      "ExterQual        2919 non-null object\n",
      "ExterCond        2919 non-null object\n",
      "Foundation       2919 non-null object\n",
      "BsmtQual         2919 non-null object\n",
      "BsmtCond         2919 non-null object\n",
      "BsmtExposure     2919 non-null object\n",
      "BsmtFinType1     2919 non-null object\n",
      "BsmtFinSF1       2919 non-null float64\n",
      "BsmtFinType2     2919 non-null object\n",
      "BsmtFinSF2       2919 non-null float64\n",
      "BsmtUnfSF        2919 non-null float64\n",
      "TotalBsmtSF      2919 non-null float64\n",
      "Heating          2919 non-null object\n",
      "HeatingQC        2919 non-null object\n",
      "CentralAir       2919 non-null object\n",
      "Electrical       2919 non-null object\n",
      "1stFlrSF         2919 non-null int64\n",
      "2ndFlrSF         2919 non-null int64\n",
      "LowQualFinSF     2919 non-null int64\n",
      "GrLivArea        2919 non-null int64\n",
      "BsmtFullBath     2919 non-null float64\n",
      "BsmtHalfBath     2919 non-null float64\n",
      "FullBath         2919 non-null int64\n",
      "HalfBath         2919 non-null int64\n",
      "BedroomAbvGr     2919 non-null int64\n",
      "KitchenAbvGr     2919 non-null int64\n",
      "KitchenQual      2919 non-null object\n",
      "TotRmsAbvGrd     2919 non-null int64\n",
      "Functional       2919 non-null object\n",
      "Fireplaces       2919 non-null int64\n",
      "FireplaceQu      2919 non-null object\n",
      "GarageType       2919 non-null object\n",
      "GarageYrBlt      2919 non-null float64\n",
      "GarageFinish     2919 non-null object\n",
      "GarageCars       2919 non-null float64\n",
      "GarageArea       2919 non-null float64\n",
      "GarageQual       2919 non-null object\n",
      "GarageCond       2919 non-null object\n",
      "PavedDrive       2919 non-null object\n",
      "WoodDeckSF       2919 non-null int64\n",
      "OpenPorchSF      2919 non-null int64\n",
      "EnclosedPorch    2919 non-null int64\n",
      "3SsnPorch        2919 non-null int64\n",
      "ScreenPorch      2919 non-null int64\n",
      "PoolArea         2919 non-null int64\n",
      "PoolQC           2919 non-null object\n",
      "Fence            2919 non-null object\n",
      "MiscFeature      2919 non-null object\n",
      "MiscVal          2919 non-null int64\n",
      "MoSold           2919 non-null int64\n",
      "YrSold           2919 non-null int64\n",
      "SaleType         2919 non-null object\n",
      "SaleCondition    2919 non-null object\n",
      "SalesPrice       1460 non-null float64\n",
      "dtypes: float64(12), int64(26), object(42)\n",
      "memory usage: 1.8+ MB\n"
     ]
    }
   ],
   "source": [
    "df = pd.read_csv(\"/data/kaggle/house-prices/data_combined_cleaned.csv\")\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_train = df[~np.isnan(df.SalesPrice)]\n",
    "df_test = df[np.isnan(df.SalesPrice)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1460, 80), (1459, 80))"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.shape, df_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAloAAAFpCAYAAABEXYZ0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAHndJREFUeJzt3V9sVGXi//HPtFOQOrRMO6XdIiSW\nP1lxZZGFteKfIoy7Rg1xiWnEuEZdQSxqCnEVvcALJDum1nZd2mBcF1e9kQvLan7JkowoRFhDoaBI\nXRQBg0vtWDqUlj/CzDy/C76MlM7I2Olj58/7dWPncGbOMx/mlI/POXOOwxhjBAAAgCGXM9wDAAAA\nyFQULQAAAEsoWgAAAJZQtAAAACyhaAEAAFhC0QIAALCEogUAAGAJRQsAAMASihYAAIAlFC0AAABL\nKFoAAACWOId7AOedOXNGXV1dwz2MlOTxeMgmBnKJjVziI5vYyCU+sokt23MpLy9PeF1mtAAAACyh\naAEAAFhC0QIAALCEogUAAGAJRQsAAMASihYAAIAlFC0AAABLKFoAAACWULQAAAAsoWgBAABYQtEC\nAACwhKIFAABgCUULAADAEudwDwCpIbxofkLr5b76ruWRAACQOZjRAgAAsISiBQAAYAlFCwAAwBKK\nFgAAgCWXPBm+ublZbW1tKiwsVH19vSSpoaFBR44ckSSdPHlS+fn5qqurUyAQ0LJly1ReXi5Jmjx5\nshYvXmxx+AAAAKnrkkVrzpw5uu2229TU1BRdtmzZsujPb7zxhvLz86OPy8rKVFdXN8TDBAAASD+X\nPHQ4depUuVyumH9mjNF//vMf3XDDDUM+MAAAgHSX1HW0Pv/8cxUWFuoXv/hFdFkgENBTTz2lUaNG\n6Z577tFVV12V9CABAADSUVJFa+vWrf1ms9xut5qbmzV69GgdOHBAdXV1qq+v73do8Ty/3y+/3y9J\n8vl8cjqd8ng8yQwnY/0c2XQmuF4q/R3xmYmNXOIjm9jIJT6yiY1cEjfoohUOh7V9+3b5fL7osry8\nPOXl5UmSKioqVFpaqo6ODk2cOHHA871er7xeb/RxKBRSV1fXYIeT0TweT8pkkyrjkFIrl1RCLvGR\nTWzkEh/ZxJbtuZz/0l8iBn15hz179qi8vFzFxcXRZcePH1ckEpEkdXZ2qqOjQ6WlpYPdBAAAQFq7\n5IxWY2Oj2tvb1dvbqyVLlqi6ulpz584dcNhQktrb27V+/Xrl5uYqJydHixYtinsiPQAAQKa7ZNGq\nra2NuXzp0qUDllVWVqqysjL5UQEAAGQArgwPAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0\nAAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAAAEsueVNppLfwovnDPQQAALIW\nM1oAAACWULQAAAAsoWgBAABYQtECAACwhKIFAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0\nAAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAAAEsoWgAAAJZQtAAAACyhaAEA\nAFhC0QIAALCEogUAAGCJ81IrNDc3q62tTYWFhaqvr5ckrV+/Xu+//74KCgokSQsXLtSMGTMkSS0t\nLdq0aZNycnL04IMPavr06RaHDwAAkLouWbTmzJmj2267TU1NTf2W33HHHZo/f36/Zd988422bdum\nl156ScFgUKtWrdJf//pX5eQwcQYAALLPJRvQ1KlT5XK5Enqx1tZWzZ49W3l5eRo7dqzKysq0f//+\npAcJAACQji45oxXPxo0btWXLFlVUVOj++++Xy+VSd3e3Jk+eHF2nqKhI3d3dQzJQAACAdDOoovW7\n3/1Od999tyTp7bff1htvvKGamhoZYxJ+Db/fL7/fL0ny+XxyOp3yeDyDGU7GSyabziEeSyr9HfGZ\niY1c4iOb2MglPrKJjVwSN6iiNWbMmOjP8+bN0wsvvCBJKi4u1tGjR6N/1t3draKiopiv4fV65fV6\no49DoZC6uroGM5yM5/F4UiabVBmHlFq5pBJyiY9sYiOX+MgmtmzPpby8POF1B3WWejAYjP68fft2\njR8/XpI0c+ZMbdu2TWfPnlUgEFBHR4cmTZo0mE0AAACkvUvOaDU2Nqq9vV29vb1asmSJqqurtXfv\nXh06dEgOh0MlJSVavHixJGn8+PG6/vrrtXz5cuXk5OhPf/oT3zgEAABZ65JFq7a2dsCyuXPnxl1/\nwYIFWrBgQXKjAgAAyABMNwEAAFhC0QIAALCEogUAAGAJRQsAAMASihYAAIAlFC0AAABLKFoAAACW\nULQAAAAsoWgBAABYQtECAACwhKIFAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0AAAALKFo\nAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAAAEsoWgAAAJZQtAAAACyhaAEAAFhC0QIA\nALCEogUAAGAJRQsAAMASihYAAIAlzuEeADJTeNH8hNbLffVdyyMBAGD4MKMFAABgCUULAADAEooW\nAACAJRQtAAAASy55Mnxzc7Pa2tpUWFio+vp6SdKbb76pnTt3yul0qrS0VDU1Nbr88ssVCAS0bNky\nlZeXS5ImT56sxYsX230HAAAAKeqSRWvOnDm67bbb1NTUFF02bdo03XvvvcrNzdVbb72llpYW3Xff\nfZKksrIy1dXV2RsxAABAmrhk0Zo6daoCgUC/Zb/+9a+jP0+ZMkUff/zx0I8McSV66QQAADC8kr6O\n1qZNmzR79uzo40AgoKeeekqjRo3SPffco6uuuirZTQAAAKSlpIrWO++8o9zcXN10002SJLfbrebm\nZo0ePVoHDhxQXV2d6uvrlZ+fP+C5fr9ffr9fkuTz+eR0OuXxeJIZTsa6OJvOYRxLon9HiY4xmb9z\nPjOxkUt8ZBMbucRHNrGRS+IGXbQ+/PBD7dy5UytXrpTD4ZAk5eXlKS8vT5JUUVGh0tJSdXR0aOLE\niQOe7/V65fV6o49DoZC6uroGO5yM5vF4UiaboR5HMq+XSrmkEnKJj2xiI5f4yCa2bM/l/Jf+EjGo\nyzvs3r1b//rXv/T0009r5MiR0eXHjx9XJBKRJHV2dqqjo0OlpaWD2QQAAEDau+SMVmNjo9rb29Xb\n26slS5aourpaLS0tCoVCWrVqlaQfLuPQ3t6u9evXKzc3Vzk5OVq0aJFcLpf1NwEAAJCKLlm0amtr\nByybO3duzHUrKytVWVmZ/KgAAAAyQNLfOkR24dISAAAkjlvwAAAAWMKMFoZVojNkua++a3kkAAAM\nPWa0AAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAAAEsoWgAAAJZQtAAAACyh\naAEAAFhC0QIAALCEogUAAGAJRQsAAMASihYAAIAlFC0AAABLKFoAAACWULQAAAAsoWgBAABYQtEC\nAACwhKIFAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0AAAALKFoAQAAWELRAgAAsISiBQAA\nYAlFCwAAwBKKFgAAgCUULQAAAEuciazU3NystrY2FRYWqr6+XpLU19enhoYGfffddyopKdGyZcvk\ncrlkjNG6deu0a9cujRw5UjU1NaqoqLD6JgAAAFJRQjNac+bM0bPPPttv2YYNG3TNNdfo5Zdf1jXX\nXKMNGzZIknbt2qVvv/1WL7/8shYvXqy///3vQz9qAACANJBQ0Zo6dapcLle/Za2traqqqpIkVVVV\nqbW1VZK0Y8cO3XzzzXI4HJoyZYpOnDihYDA4xMMGAABIfYM+R6unp0dut1uS5Ha7dfz4cUlSd3e3\nPB5PdL3i4mJ1d3cnOUwAAID0k9A5Wj+FMWbAMofDMWCZ3++X3++XJPl8Pjmdzn4FDT+4OJvOYRzL\ncIn12eAzExu5xEc2sZFLfGQTG7kkbtBFq7CwUMFgUG63W8FgUAUFBZLOzWB1dXVF1zt69Gh05utC\nXq9XXq83+jgUCvV7Hn7g8XiyPptY759cYiOX+MgmNnKJj2xiy/ZcysvLE1530IcOZ86cqc2bN0uS\nNm/erFmzZkWXb9myRcYYffHFF8rPz49ZtAAAADJdQjNajY2Nam9vV29vr5YsWaLq6mrdddddamho\n0KZNm+TxeLR8+XJJ0rXXXqu2tjY98cQTGjFihGpqaqy+AQAAgFSVUNGqra2NuXzlypUDljkcDj38\n8MPJjQoAACADcGV4AAAASyhaAAAAllC0AAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAA\ngCUULQAAAEsoWgAAAJZQtAAAACyhaAEAAFhC0QIAALCEogUAAGAJRQsAAMASihYAAIAlFC0AAABL\nKFoAAACWULQAAAAsoWgBAABYQtECAACwhKIFAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0\nAAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAAAEucwz0AIBHhRfMHLOuMsV7u\nq+/aHwwAAAkadNE6cuSIGhoaoo8DgYCqq6t14sQJvf/++yooKJAkLVy4UDNmzEh+pAAAAGlm0EWr\nvLxcdXV1kqRIJKJHHnlEv/3tb/XBBx/ojjvu0Pz5A2cgAAAAssmQnKO1Z88elZWVqaSkZCheDgAA\nICMMyTlaW7du1Q033BB9vHHjRm3ZskUVFRW6//775XK5hmIzAAAAacVhjDHJvEAoFNIjjzyi+vp6\njRkzRseOHYuen/X2228rGAyqpqZmwPP8fr/8fr8kyefzKRKJKBQKJTOUjOV0Ovtl0/mH2cM4mtRW\n2rJtuIcw7C7+vOAHZBMbucRHNrFley4jRoxIeN2kZ7R27dqlK6+8UmPGjJGk6H8lad68eXrhhRdi\nPs/r9crr9UYfh0IhdXV1JTucjOTxeMgmQeTE5+XHkE1s5BIf2cSW7bmUl5cnvG7S52hdfNgwGAxG\nf96+fbvGjx+f7CYAAADSUlIzWt9//70+/fRTLV68OLrsrbfe0qFDh+RwOFRSUtLvzwAAALJJUkVr\n5MiR+sc//tFv2eOPP57UgAAAADIFt+ABAACwhKIFAABgCUULAADAEooWAACAJRQtAAAASyhaAAAA\nllC0AAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAAAEsoWgAAAJY4h3sAwHAJ\nL5qf0Hq5r75reSQAgEzFjBYAAIAlFC0AAABLKFoAAACWULQAAAAsoWgBAABYwrcOfwZ8u+3nk2jW\nAAD8HJjRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCWcDJ9C4p3I3fkzjwMAAAwNZrQAAAAsoWgBAABY\nQtECAACwhKIFAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0AAAALEn6yvBLly7VZZddppyc\nHOXm5srn86mvr08NDQ367rvvVFJSomXLlsnlcg3FeAEAANLGkNyC57nnnlNBQUH08YYNG3TNNdfo\nrrvu0oYNG7Rhwwbdd999Q7EpAACAtGHl0GFra6uqqqokSVVVVWptbbWxGQAAgJQ2JDNaq1evliTd\neuut8nq96unpkdvtliS53W4dP358KDYDAACQVpIuWqtWrVJRUZF6enr0/PPPq7y8PKHn+f1++f1+\nSZLP55PT6ZTH40l2OCmpc7gHgKSk2+cyk/elZJFNbOQSH9nERi6JS7poFRUVSZIKCws1a9Ys7d+/\nX4WFhQoGg3K73QoGg/3O3zrP6/XK6/VGH4dCIXV1dSU7HGDIdf5hdkLr5b76ruWRJMbj8bAvxUE2\nsZFLfGQTW7bnkuikkpTkOVqnT5/WqVOnoj9/+umnmjBhgmbOnKnNmzdLkjZv3qxZs2YlsxkAAIC0\nlNSMVk9Pj1588UVJUjgc1o033qjp06dr4sSJamho0KZNm+TxeLR8+fIhGSwAAEA6SapolZaWqq6u\nbsDy0aNHa+XKlcm8NAAAQNrjyvAAAACWULQAAAAsoWgBAABYQtECAACwhKIFAABgCUULAADAEooW\nAACAJRQtAAAASyhaAAAAllC0AAAALKFoAQAAWELRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUULQAA\nAEsoWgAAAJZQtAAAACyhaAEAAFjiHO4BAJkivGj+kL5e7qvvDunrAQB+fsxoAQAAWELRAgAAsISi\nBQAAYAlFCwAAwBKKFgAAgCUULQAAAEsoWgAAAJZQtAAAACyhaAEAAFhC0QIAALCEogUAAGAJRQsA\nAMASihYAAIAlFC0AAABLnIN9YldXl5qamnTs2DE5HA55vV7dfvvtWr9+vd5//30VFBRIkhYuXKgZ\nM2YM2YABAADSxaCLVm5urv74xz+qoqJCp06d0ooVKzRt2jRJ0h133KH58+cP2SABAADS0aCLltvt\nltvtliSNGjVK48aNU3d395ANDAAAIN0NyTlagUBABw8e1KRJkyRJGzdu1JNPPqnm5mb19fUNxSYA\nAADSjsMYY5J5gdOnT+u5557TggULdN111+nYsWPR87PefvttBYNB1dTUDHie3++X3++XJPl8PkUi\nEYVCoWSGkrI6/zB7uIeADFbasq3fY6fTGXNf+imfw4tfM1PEyybbkUt8ZBNbtucyYsSIhNcd9KFD\nSQqFQqqvr9dNN92k6667TpI0ZsyY6J/PmzdPL7zwQszner1eeb3efq/V1dWVzHCArHTxfuPxeJLe\nlzJ1XxyKbDIRucRHNrFley7l5eUJrzvoQ4fGGK1du1bjxo3TnXfeGV0eDAajP2/fvl3jx48f7CYA\nAADS2qBntPbt26ctW7ZowoQJ+vOf/yzp3KUctm7dqkOHDsnhcKikpESLFy8essECAACkk0EXrV/+\n8pdav379gOVcMwsAAOAcrgwPAABgCUULAADAEooWAACAJRQtAAAASyhaAAAAllC0AAAALKFoAQAA\nWELRAgAAsISiBQAAYAlFCwAAwJJB34IHQOYKL5qf0Hq5r75reSQAkN6Y0QIAALCEogUAAGAJRQsA\nAMASihYAAIAlnAyfhERPGAZsuvhz2DlM4wAADMSMFgAAgCUULQAAAEsoWgAAAJZwjlYMnHsFJGao\nL2zKhVIBZBpmtAAAACyhaAEAAFjCoUMAaYdDjADSBTNaAAAAllC0AAAALKFoAQAAWELRAgAAsISi\nBQAAYAlFCwAAwJKsurwDV3wHssvF+3xnnPW4DAQAW5jRAgAAsCSrZrQADI9Un00ezvExmwZkNma0\nAAAALKFoAQAAWGLt0OHu3bu1bt06RSIRzZs3T3fddZetTQFA2uK+jfGlQzbpMMZMka5ZW5nRikQi\neu211/Tss8+qoaFBW7du1TfffGNjUwAAACnLyozW/v37VVZWptLSUknS7Nmz1draqiuuuMLG5gAg\n49k8YT/eZS+knzY7kOozDqk+PmQmKzNa3d3dKi4ujj4uLi5Wd3e3jU0BAACkLCszWsaYAcscDke/\nx36/X36/X5Lk8/k0YsQIlZeX2xjOD/7fDruvDwDZbKh/x6bI6/3ov01Z/O+K9X+zL5amWVuZ0Sou\nLtbRo0ejj48ePSq3291vHa/XK5/PJ5/PJ0lasWKFjaFkBLKJjVxiI5f4yCY2comPbGIjl8RZKVoT\nJ05UR0eHAoGAQqGQtm3bppkzZ9rYFAAAQMqycugwNzdXDz30kFavXq1IJKJbbrlF48ePt7EpAACA\nlGXtOlozZszQjBkzEl7f6/XaGkraI5vYyCU2comPbGIjl/jIJjZySZzDxDpzHQAAAEnjFjwAAACW\nWDt0+FNk6u16mpub1dbWpsLCQtXX10uS+vr61NDQoO+++04lJSVatmyZXC6XjDFat26ddu3apZEj\nR6qmpkYVFRWSpA8//FDvvPOOJGnBggWaM2eOJOnAgQNqamrSmTNndO211+rBBx+Uw+GIu41U0dXV\npaamJh07dkwOh0Ner1e333571mdz5swZPffccwqFQgqHw6qsrFR1dbUCgYAaGxvV19enK6+8Uo8/\n/ricTqfOnj2rNWvW6MCBAxo9erRqa2s1duxYSVJLS4s2bdqknJwcPfjgg5o+fbqk+PtavG2kmkgk\nohUrVqioqEgrVqwgG0lLly7VZZddppycHOXm5srn82X9vnTeiRMntHbtWh0+fFgOh0OPPvqoysvL\nszqbI0eOqKGhIfo4EAiourpaVVVVWZ2LVWaYhcNh89hjj5lvv/3WnD171jz55JPm8OHDwz2sIbF3\n717z1VdfmeXLl0eXvfnmm6alpcUYY0xLS4t58803jTHG7Ny506xevdpEIhGzb98+88wzzxhjjOnt\n7TVLly41vb29/X42xpgVK1aYffv2mUgkYlavXm3a2tp+dBuporu723z11VfGGGNOnjxpnnjiCXP4\n8OGszyYSiZhTp04ZY4w5e/aseeaZZ8y+fftMfX29+eijj4wxxrzyyitm48aNxhhj/v3vf5tXXnnF\nGGPMRx99ZF566SVjjDGHDx82Tz75pDlz5ozp7Ow0jz32mAmHwz+6r8XbRqp57733TGNjo/nLX/5i\njIk/7mzKpqamxvT09PRblu370nl/+9vfjN/vN8ac26f6+vrI5gLhcNg8/PDDJhAIkItFw37o8MLb\n9TidzujtejLB1KlTB7T11tZWVVVVSZKqqqqi73XHjh26+eab5XA4NGXKFJ04cULBYFC7d+/WtGnT\n5HK55HK5NG3aNO3evVvBYFCnTp3SlClT5HA4dPPNN0dfK942UoXb7Y7+H9GoUaM0btw4dXd3Z302\nDodDl112mSQpHA4rHA7L4XBo7969qqyslCTNmTOnXy7n/w+ysrJSn332mYwxam1t1ezZs5WXl6ex\nY8eqrKxM+/fvj7uvGWPibiOVHD16VG1tbZo3b54k/ei4sy2bi2X7viRJJ0+e1Oeff665c+dKkpxO\npy6//HKyucCePXtUVlamkpIScrFo2Oe/Y92u58svvxzGEdnV09MTvXir2+3W8ePHJZ3LwePxRNc7\nf9uii/MpKiqKufzC2xzF20YqCgQCOnjwoCZNmkQ2Ondo7Omnn9a3336r3//+9yotLVV+fr5yc3Ml\n/fAepf77Tm5urvLz89Xb26vu7m5Nnjw5+poXPifWvtbb2xt3G6nk9ddf13333adTp05J0o+OO9uy\nWb16tSTp1ltvldfrZV/Sud8tBQUFam5u1tdff62Kigo98MADZHOBrVu36oYbbpDEv002DXvRMgnc\nricb/JQcHA5HzPXTzenTp1VfX68HHnhA+fn5cdfLpmxycnJUV1enEydO6MUXX9T//ve/uOvGyyXe\n+0/nfW3nzp0qLCxURUWF9u7de8n1symbVatWqaioSD09PXr++ed/9LYo2bQvhcNhHTx4UA899JAm\nT56sdevWacOGDXHXz6ZsJCkUCmnnzp269957f3S9bMvFhmE/dJjI7XoySWFhoYLBoCQpGAyqoKBA\n0rkcurq6ouudz6GoqKhfPt3d3XK73TFzKyoq+tFtpJJQKKT6+nrddNNNuu666ySRzYUuv/xyTZ06\nVV9++aVOnjypcDgs6dx7PP9eLnyf4XBYJ0+elMvlGvD+zz8n3r42evTouNtIFfv27dOOHTu0dOlS\nNTY26rPPPtPrr79ONlK/z/asWbO0f/9+9iWde6/FxcXRGczKykodPHiQbP7Prl27dOWVV2rMmDGS\n+P1r07AXrWy7Xc/MmTO1efNmSdLmzZs1a9as6PItW7bIGKMvvvhC+fn5crvdmj59uj755BP19fWp\nr69Pn3zyiaZPny63261Ro0bpiy++kDFGW7ZsieYWbxupwhijtWvXaty4cbrzzjujy7M9m+PHj+vE\niROSzn0Dcc+ePRo3bpyuvvpqffzxx5LOfcvn/Hv5zW9+ow8//FCS9PHHH+vqq6+Ww+HQzJkztW3b\nNp09e1aBQEAdHR2aNGlS3H3N4XDE3UaquPfee7V27Vo1NTWptrZWv/rVr/TEE09kfTanT5+OHko9\nffq0Pv30U02YMCHr9yVJGjNmjIqLi3XkyBFJ585HuuKKK8jm/1x42FDi969NKXHB0ra2Nv3zn/+M\n3q5nwYIFwz2kIdHY2Kj29nb19vaqsLBQ1dXVmjVrlhoaGtTV1SWPx6Ply5dHv0L72muv6ZNPPtGI\nESNUU1OjiRMnSpI2bdqklpYWSee+QnvLLbdIkr766is1NzfrzJkzmj59uh566CE5HA719vbG3Eaq\n+O9//6uVK1dqwoQJ0SnohQsXavLkyVmdzddff62mpiZFIhEZY3T99dfr7rvvVmdn54DLC+Tl5enM\nmTNas2aNDh48KJfLpdraWpWWlkqS3nnnHX3wwQfKycnRAw88oGuvvVZS/H0t3jZS0d69e/Xee+9p\nxYoVWZ9NZ2enXnzxRUnnZu5uvPFGLViwIO7nPFv2pfMOHTqktWvXKhQKaezYsaqpqZExJuuz+f77\n7/Xoo49qzZo10dM2+MzYkxJFCwAAIBMN+6FDAACATEXRAgAAsISiBQAAYAlFCwAAwBKKFgAAgCUU\nLQAAAEsoWgAAAJZQtAAAACz5/6IlVtNODA6UAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a13532860>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(df_train.SalesPrice, bins = 50);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlkAAAFpCAYAAACvaj13AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAGbZJREFUeJzt3W2MnWWdP/Dv9MzK2k46zsyB4lAI\nFEo2JTxIpooottbZN7Ky/fOiBsVE2YVgFSJVQ30IJrrqvLC0aWKjiywaTNa4CVPxMXEyaXmBmmlL\nlZRNV4xuWMWW6YxtBxDo9PxfGEZrB3uYOVfPnOnn82rOmfue63d+c88531z3NffdVqvVagEAoKEW\nNLsAAID5SMgCAChAyAIAKEDIAgAoQMgCAChAyAIAKEDIAgAoQMgCAChAyAIAKEDIAgAoQMgCACig\nvdkFvOx3v/td08auVqsZHR1t2vjziV42hj42jl42hj42jl42RjP72NvbW9d2ZrIAAAoQsgAAChCy\nAAAKELIAAAoQsgAAChCyAAAKELIAAAoQsgAAChCyAAAKELIAAAoQsgAAChCyAAAKELIAAApob3YB\nwPw3eesNdW1Xue/hwpUAnD6nDFnbtm3Lnj170tnZmU2bNiVJHnzwwezevTvt7e1ZsmRJ1q9fn0WL\nFiVJBgcHMzw8nAULFuQDH/hArrrqqrKvAABgDjrl6cLVq1fnk5/85AnPXXHFFdm0aVO+9KUv5fWv\nf30GBweTJP/3f/+XRx99NPfee28+9alP5f7778/x48fLVA4AMIedMmStWLEiHR0dJzx35ZVXplKp\nJEkuvfTSjI2NJUlGRkZy7bXX5u/+7u9yzjnn5Nxzz82TTz5ZoGwAgLlt1gvfh4eHp04Jjo2Npaen\nZ+p73d3dUwEMAOBMMquF7w899FAqlUquu+66JEmtVqt736GhoQwNDSVJBgYGUq1WZ1PKrLS3tzd1\n/PlELxtjvvXxQJ3blXjN862XzaKPjaOXjdEKfZxxyNqxY0d2796de+65J21tbUmSnp6eHDp0aGqb\nsbGxdHd3T7t/f39/+vv7px6Pjo7OtJRZq1arTR1/PtHLxjhT+1jiNZ+pvWw0fWwcvWyMZvaxt7e3\nru1mdLpw7969+c53vpO77747Z5111tTzfX19efTRR/PSSy/l4MGDefrpp3PJJZfMZAgAgJZ2ypms\nLVu25IknnsjRo0dz++23Z926dRkcHMyxY8fyuc99LkmyfPny3HbbbTn//PPz5je/ORs2bMiCBQvy\nL//yL1mwwPVOAYAzzylD1kc+8pGTnluzZs0rbn/jjTfmxhtvnF1VAAAtzjQTAEABQhYAQAFCFgBA\nAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFC\nFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYA\nQAHtzS4AoNkmb72hru0q9z1cuBJgPjGTBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQ\ngJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUED7qTbYtm1b9uzZk87Ozmza\ntClJMjExkc2bN+eZZ57J2WefnbvuuisdHR2p1Wp54IEH8thjj+Wss87K+vXrs2zZsuIvAgBgrjnl\nTNbq1avzyU9+8oTntm/fnssvvzxbt27N5Zdfnu3btydJHnvssfz+97/P1q1bc9ttt+VrX/tamaoB\nAOa4U4asFStWpKOj44TnRkZGsmrVqiTJqlWrMjIykiTZtWtX3va2t6WtrS2XXnppnn322YyPjxco\nGwBgbpvRmqzDhw+nq6srSdLV1ZUjR44kScbGxlKtVqe26+npydjYWAPKBABoLadck/Vq1Gq1k55r\na2ubdtuhoaEMDQ0lSQYGBk4IZ6dbe3t7U8efT/SyMVqljwf+37UN/XklXnM9vTxQ589qhd9JKa1y\nTLYCvWyMVujjjEJWZ2dnxsfH09XVlfHx8SxevDjJn2auRkdHp7Y7dOjQ1IzXX+vv709/f//U47/c\n73SrVqtNHX8+0cvGOFP7WOI1N7KXZ+Lv5GVn6jFZgl42RjP72NvbW9d2Mzpd2NfXl507dyZJdu7c\nmZUrV049/8gjj6RWq+V//ud/snDhwlcMWQAA89kpZ7K2bNmSJ554IkePHs3tt9+edevWZe3atdm8\neXOGh4dTrVazYcOGJMkb3vCG7NmzJ3feeWde85rXZP369cVfAADAXHTKkPWRj3xk2ufvueeek55r\na2vLv/7rv86+KgCAFueK7wAABQhZAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZAAAFCFkA\nAAUIWQAABQhZAAAFCFkAAAUIWQAABbQ3uwCAl03eekNd21Xue7hwJQCzZyYLAKAAIQsAoAAhCwCg\nACELAKAAC98B6lTvwvzE4nzATBYAQBFCFgBAAU4XAi3H9bSAVmAmCwCgACELAKAAIQsAoAAhCwCg\nACELAKAAIQsAoAAhCwCgACELAKAAIQsAoAAhCwCgACELAKAAIQsAoAAhCwCgACELAKAAIQsAoAAh\nCwCgACELAKAAIQsAoAAhCwCggPZmFwBQyuStN+RAs4sAzlizClnf+973Mjw8nLa2tpx//vlZv359\n/vCHP2TLli2ZmJjIRRddlDvuuCPt7bIcAHBmmfHpwrGxsfzwhz/MwMBANm3alOPHj+fRRx/NN7/5\nzVx//fXZunVrFi1alOHh4UbWCwDQEma1Juv48eN58cUXMzk5mRdffDGve93rsm/fvlxzzTVJktWr\nV2dkZKQhhQIAtJIZn8fr7u7Ou971rnzwgx/Ma17zmlx55ZVZtmxZFi5cmEqlMrXN2NhYw4oFAGgV\nMw5ZExMTGRkZyZe//OUsXLgw9957b/bu3Vv3/kNDQxkaGkqSDAwMpFqtzrSUWWtvb2/q+POJXjZG\nq/TRovJX1gq/v1ejVY7JVqCXjdEKfZxxyHr88cdzzjnnZPHixUmSN73pTdm/f3+ee+65TE5OplKp\nZGxsLN3d3dPu39/fn/7+/qnHo6OjMy1l1qrValPHn0/0sjH0sfXNt9+fY7Jx9LIxmtnH3t7eurab\n8ZqsarWaX/7yl3nhhRdSq9Xy+OOPZ+nSpbnsssvy05/+NEmyY8eO9PX1zXQIAICWNeOZrOXLl+ea\na67J3XffnUqlkgsvvDD9/f25+uqrs2XLlnzrW9/KRRddlDVr1jSyXgCAljCrC1itW7cu69atO+G5\nJUuW5Itf/OKsigIAaHVuqwMAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAF\nAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQ\ngJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQ\nBQBQgJAFAFBAe7MLAOaWyVtvaHYJAPOCmSwAgAKELACAApwuBCig3tOulfseLlwJ0CxmsgAAChCy\nAAAKELIAAAoQsgAAChCyAAAKmNV/Fz777LP5yle+kqeeeiptbW354Ac/mN7e3mzevDnPPPNMzj77\n7Nx1113p6OhoVL0AAC1hViHrgQceyFVXXZWPfvSjOXbsWF544YUMDg7m8ssvz9q1a7N9+/Zs3749\nN998c6PqBQBoCTM+Xfjcc8/lv//7v7NmzZokSXt7exYtWpSRkZGsWrUqSbJq1aqMjIw0plIAgBYy\n45msgwcPZvHixdm2bVv+93//N8uWLcv73//+HD58OF1dXUmSrq6uHDlypGHFAgC0ihmHrMnJyfz6\n17/OLbfckuXLl+eBBx7I9u3b695/aGgoQ0NDSZKBgYFUq9WZljJr7e3tTR1/PtHLxmhmHw80ZdQz\nV6v8vfjbbhy9bIxW6OOMQ1ZPT096enqyfPnyJMk111yT7du3p7OzM+Pj4+nq6sr4+HgWL1487f79\n/f3p7++fejw6OjrTUmatWq02dfz5RC8bQx/PHK3ye3ZMNo5eNkYz+9jb21vXdjNek/W6170uPT09\n+d3vfpckefzxx7N06dL09fVl586dSZKdO3dm5cqVMx0CAKBlzeq/C2+55ZZs3bo1x44dyznnnJP1\n69enVqtl8+bNGR4eTrVazYYNGxpVKwBAy5hVyLrwwgszMDBw0vP33HPPbH4sAEDLm1XIAppv8tYb\n6tquct/DhSuhJL9naD1uqwMAUICQBQBQgJAFAFCAkAUAUICF73CGqHfhNACNYSYLAKAAIQsAoACn\nCwGayGlcmL/MZAEAFCBkAQAUIGQBABQgZAEAFCBkAQAUIGQBABQgZAEAFCBkAQAUIGQBABTgiu8A\n80i9V5Cv3Pdw4UoAM1kAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZAAAF\nCFkAAAUIWQAABQhZAAAFuEE0wBnIjaShPDNZAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZ\nAAAFCFkAAAUIWQAABbjiOwCv6OUrwx+oY1tXh4cTzTpkHT9+PBs3bkx3d3c2btyYgwcPZsuWLZmY\nmMhFF12UO+64I+3tshwAcGaZ9enCH/zgBznvvPOmHn/zm9/M9ddfn61bt2bRokUZHh6e7RAAAC1n\nViHr0KFD2bNnT97xjnckSWq1Wvbt25drrrkmSbJ69eqMjIzMvkoAgBYzq5D19a9/PTfffHPa2tqS\nJEePHs3ChQtTqVSSJN3d3RkbG5t9lQAALWbGi6V2796dzs7OLFu2LPv27XvV+w8NDWVoaChJMjAw\nkGq1OtNSZq29vb2p488netkYr6aP9SxIhtPB3359vE82Riv0ccYha//+/dm1a1cee+yxvPjii3n+\n+efz9a9/Pc8991wmJydTqVQyNjaW7u7uaffv7+9Pf3//1OPR0dGZljJr1Wq1qePPJ3rZGPpIK3LM\n1sffd2M0s4+9vb11bTfjkPWe97wn73nPe5Ik+/bty3e/+93ceeeduffee/PTn/40b3nLW7Jjx470\n9fXNdAgAgJbV8IuRvve97833vve93HHHHZmYmMiaNWsaPQQAwJzXkAtYXXbZZbnsssuSJEuWLMkX\nv/jFRvxYAICW5bY6AAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZ\nAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAABQhZAAAFCFkAAAUIWQAA\nBQhZAAAFCFkAAAW0N7sAYHqTt97Q7BKgiHqP7cp9DxeuBMoykwUAUICQBQBQgJAFAFCAkAUAUICQ\nBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQgJAFAFCAkAUAUICQBQBQQHuzC4D5YvLW\nG+rarnLfw4Urgeao928AzhRmsgAAChCyAAAKcLoQTsEpEGgOp+BpdTMOWaOjo/nyl7+cP/zhD2lr\na0t/f3/e+c53ZmJiIps3b84zzzyTs88+O3fddVc6OjoaWTMAwJw345BVqVTyvve9L8uWLcvzzz+f\njRs35oorrsiOHTty+eWXZ+3atdm+fXu2b9+em2++uZE1w6yZnQKgtBmvyerq6sqyZcuSJK997Wtz\n3nnnZWxsLCMjI1m1alWSZNWqVRkZGWlMpQAALaQhC98PHjyYX//617nkkkty+PDhdHV1JflTEDty\n5EgjhgAAaCmzXvj+xz/+MZs2bcr73//+LFy4sO79hoaGMjQ0lCQZGBhItVqdbSkz1t7e3tTx55NW\n6eWBJo5dT3/a2/1PCtSrFd5z/lKrvE/Oda3Qx1m9kx87diybNm3Kddddlze96U1Jks7OzoyPj6er\nqyvj4+NZvHjxtPv29/env79/6vHo6OhsSpmVarXa1PHnE708tXr6M9ffOGAuabX3HO+TjdHMPvb2\n9ta13YxPF9ZqtXzlK1/Jeeedl3/6p3+aer6vry87d+5MkuzcuTMrV66c6RAAAC1rxjNZ+/fvzyOP\nPJILLrggH//4x5MkN910U9auXZvNmzdneHg41Wo1GzZsaFixAACtYsYh6x/+4R/y7W9/e9rv3XPP\nPTMuCABgPrC6Fk6zeq7R1cyF+QA0hnsXAgAUIGQBABQgZAEAFCBkAQAUIGQBABQgZAEAFCBkAQAU\nIGQBABQgZAEAFCBkAQAUIGQBABQgZAEAFOAG0QC0tHpuul5C5b6HmzIurcNMFgBAAUIWAEABQhYA\nQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEABQhYAQAFCFgBAAUIWAEAB\n7c0uAABa0eStN9S9beW+hwtWwlxlJgsAoAAhCwCgACELAKAAIQsAoAAL32mqeheO1rto9NUsRAU4\nXf7yvenA39jOAvn5xUwWAEABQhYAQAFOF/KqnOp03MvT4Ka8AV69Ri+hoLnMZAEAFGAmiySNXzBu\nAToAZzozWQAABQhZAAAFnFGnC1/pFNbfumbJ3zLXFx46ZQdwZmvW50CJz8e/fi2v9Nk9lz6bi4Ws\nvXv35oEHHsjx48fzjne8I2vXri01FADAnFMkZB0/fjz3339/Pv3pT6enpyef+MQn0tfXl6VLl5YY\nrmn8q+3pY1YO4M+8J7aGImuynnzyyZx77rlZsmRJ2tvbc+2112ZkZKTEUAAAc1KRkDU2Npaenp6p\nxz09PRkbGysxFADAnFTkdGGtVjvpuba2thMeDw0NZWhoKEkyMDCQ3t7eEqWc6Pu7yo8xl5xprxeA\n+asFP9OKzGT19PTk0KFDU48PHTqUrq6uE7bp7+/PwMBABgYGSpTwqmzcuLHZJcwbetkY+tg4etkY\n+tg4etkYrdDHIiHr4osvztNPP52DBw/m2LFjefTRR9PX11diKACAOanI6cJKpZJbbrkln//853P8\n+PG8/e1vz/nnn19iKACAOanYdbKuvvrqXH311aV+fEP19/c3u4R5Qy8bQx8bRy8bQx8bRy8boxX6\n2FabbpU6AACz4t6FAAAFzOt7F27bti179uxJZ2dnNm3alCSZmJjI5s2b88wzz+Tss8/OXXfdlY6O\njpP2ffe7350LLrggSVKtVnP33Xef1trnmul6+ZOf/CT/9V//ld/+9rf5whe+kIsvvnjafd1i6c9m\n08cPfehD+fu///ssWLAglUplTvxnbjNN18sHH3wwu3fvTnt7e5YsWZL169dn0aJFJ+3rmPyz2fTR\nMXmi6Xr5rW99K7t27UpbW1s6Ozuzfv36dHd3n7Tvjh078tBDDyVJbrzxxqxevfp0lj6nzKaPc+6z\nuzaP7du3r/arX/2qtmHDhqnnHnzwwdrg4GCtVqvVBgcHaw8++OC0+958882npcZWMV0vn3rqqdpv\nf/vb2mc+85nak08+Oe1+k5OTtQ9/+MO13//+97WXXnqp9rGPfaz21FNPna6y55yZ9rFWq9XWr19f\nO3z48OkosyVM18u9e/fWjh07VqvV/vS3Pt3ft2PyRDPtY63mmPxr0/Xy2Wefnfr6+9//fu2rX/3q\nSfsdPXq09qEPfah29OjRE74+U820j7Xa3PvsntenC1esWHHSLNXIyEhWrVqVJFm1apXb/dRpul4u\nXbr0lBeRdYulE820j5xsul5eeeWVqVQqSZJLL7102jtNOCZPNNM+crLperlw4cKpr1944YWTLsyd\n/Glm9YorrkhHR0c6OjpyxRVXZO/evcXrnatm2se5aF6fLpzO4cOHpy6M2tXVlSNHjky73UsvvZSN\nGzemUqnkn//5n/PGN77xdJY5b0x3i6Vf/vKXTayotX3+859PkvzjP/5jS/xnTTMNDw/n2muvPel5\nx+Sr80p9fJlj8tT+8z//M4888kgWLlyYz3zmMyd9/6+Pye7ubsF2GqfqYzL3PrvPuJBVr23btqW7\nuzsHDhzIZz/72VxwwQU599xzm11Wy6nVcYsl6vO5z30u3d3dOXz4cP7t3/4tvb29WbFiRbPLmpMe\neuihVCqVXHfddSd9zzFZv7/Vx8QxWa+bbropN910UwYHB/OjH/0o69atO+U+jsmT1dPHufbZPa9P\nF06ns7Mz4+PjSZLx8fEsXrx42u1eXlC3ZMmSrFixIr/5zW9OV4nzSj23WKI+Lx+TnZ2dWblyZZ58\n8skmVzQ37dixI7t3786dd9457QeVY7I+p+pj4ph8td761rfmZz/72UnPd3d3n3BMjo2NOSb/hlfq\nYzL3PrvPuJDV19eXnTt3Jkl27tyZlStXnrTNxMREXnrppSTJkSNHsn///ixduvS01jlfuMVSY/zx\nj3/M888/P/X1L37xi6n/oOHP9u7dm+985zu5++67c9ZZZ027jWPy1Orpo2OyPk8//fTU17t27Zp2\n/eVVV12Vn//855mYmMjExER+/vOf56qrrjqdZc559fRxLn52z+uLkW7ZsiVPPPFEjh49ms7Ozqxb\nty4rV67M5s2bMzo6mmq1mg0bNqSjoyO/+tWv8uMf/zi333579u/fn3//93/PggULcvz48Vx//fVZ\ns2ZNs19OU03Xy46OjvzHf/xHjhw5kkWLFuXCCy/Mpz71qYyNjeWrX/1qPvGJTyRJ9uzZk2984xtT\nt1i68cYbm/xqmmemfTxw4EC+9KUvJUkmJyfz1re+9YzuYzJ9LwcHB3Ps2LGpRbPLly/Pbbfd5pj8\nG2baR8fkyabr5Z49e/L000+nra0t1Wo1t912W7q7u0/4zEn+tPZtcHAwyZ8u4fD2t7+9mS+lqWba\nx7n42T2vQxYAQLOccacLAQBOByELAKAAIQsAoAAhCwCgACELAKAAIQsAoAAhCwCgACELAKCA/w+4\nBB+bRTxpIgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a13e9ab00>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(np.log(df_train.SalesPrice), bins = 50);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>LotConfig</th>\n",
       "      <th>LandSlope</th>\n",
       "      <th>...</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>Inside</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>FR2</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>Inside</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>Corner</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>None</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>FR2</td>\n",
       "      <td>Gtl</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 78 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0          60       RL         65.0     8450   Pave  None      Reg   \n",
       "1          20       RL         80.0     9600   Pave  None      Reg   \n",
       "2          60       RL         68.0    11250   Pave  None      IR1   \n",
       "3          70       RL         60.0     9550   Pave  None      IR1   \n",
       "4          60       RL         84.0    14260   Pave  None      IR1   \n",
       "\n",
       "  LandContour LotConfig LandSlope      ...      ScreenPorch PoolArea PoolQC  \\\n",
       "0         Lvl    Inside       Gtl      ...                0        0   None   \n",
       "1         Lvl       FR2       Gtl      ...                0        0   None   \n",
       "2         Lvl    Inside       Gtl      ...                0        0   None   \n",
       "3         Lvl    Corner       Gtl      ...                0        0   None   \n",
       "4         Lvl       FR2       Gtl      ...                0        0   None   \n",
       "\n",
       "  Fence MiscFeature  MiscVal  MoSold  YrSold  SaleType SaleCondition  \n",
       "0  None        None        0       2    2008        WD        Normal  \n",
       "1  None        None        0       5    2007        WD        Normal  \n",
       "2  None        None        0       9    2008        WD        Normal  \n",
       "3  None        None        0       2    2006        WD       Abnorml  \n",
       "4  None        None        0      12    2008        WD        Normal  \n",
       "\n",
       "[5 rows x 78 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = np.log(df.SalesPrice)\n",
    "ids = df.Id\n",
    "X = df.copy()\n",
    "del X[\"Id\"]\n",
    "del X[\"SalesPrice\"]\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>OverallQual</th>\n",
       "      <th>OverallCond</th>\n",
       "      <th>YearBuilt</th>\n",
       "      <th>YearRemodAdd</th>\n",
       "      <th>MasVnrArea</th>\n",
       "      <th>BsmtFinSF1</th>\n",
       "      <th>BsmtFinSF2</th>\n",
       "      <th>...</th>\n",
       "      <th>SaleType_ConLI</th>\n",
       "      <th>SaleType_ConLw</th>\n",
       "      <th>SaleType_New</th>\n",
       "      <th>SaleType_Oth</th>\n",
       "      <th>SaleType_WD</th>\n",
       "      <th>SaleCondition_AdjLand</th>\n",
       "      <th>SaleCondition_Alloca</th>\n",
       "      <th>SaleCondition_Family</th>\n",
       "      <th>SaleCondition_Normal</th>\n",
       "      <th>SaleCondition_Partial</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "      <td>1460.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>56.897260</td>\n",
       "      <td>70.176370</td>\n",
       "      <td>10516.828082</td>\n",
       "      <td>6.099315</td>\n",
       "      <td>5.575342</td>\n",
       "      <td>1971.267808</td>\n",
       "      <td>1984.865753</td>\n",
       "      <td>103.117123</td>\n",
       "      <td>443.639726</td>\n",
       "      <td>46.549315</td>\n",
       "      <td>...</td>\n",
       "      <td>0.003425</td>\n",
       "      <td>0.003425</td>\n",
       "      <td>0.083562</td>\n",
       "      <td>0.002055</td>\n",
       "      <td>0.867808</td>\n",
       "      <td>0.002740</td>\n",
       "      <td>0.008219</td>\n",
       "      <td>0.013699</td>\n",
       "      <td>0.820548</td>\n",
       "      <td>0.085616</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>42.300571</td>\n",
       "      <td>22.433457</td>\n",
       "      <td>9981.264932</td>\n",
       "      <td>1.382997</td>\n",
       "      <td>1.112799</td>\n",
       "      <td>30.202904</td>\n",
       "      <td>20.645407</td>\n",
       "      <td>180.731373</td>\n",
       "      <td>456.098091</td>\n",
       "      <td>161.319273</td>\n",
       "      <td>...</td>\n",
       "      <td>0.058440</td>\n",
       "      <td>0.058440</td>\n",
       "      <td>0.276824</td>\n",
       "      <td>0.045299</td>\n",
       "      <td>0.338815</td>\n",
       "      <td>0.052289</td>\n",
       "      <td>0.090317</td>\n",
       "      <td>0.116277</td>\n",
       "      <td>0.383862</td>\n",
       "      <td>0.279893</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>20.000000</td>\n",
       "      <td>21.000000</td>\n",
       "      <td>1300.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1872.000000</td>\n",
       "      <td>1950.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>20.000000</td>\n",
       "      <td>60.000000</td>\n",
       "      <td>7553.500000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>1954.000000</td>\n",
       "      <td>1967.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>50.000000</td>\n",
       "      <td>70.000000</td>\n",
       "      <td>9478.500000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>1973.000000</td>\n",
       "      <td>1994.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>383.500000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>70.000000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>11601.500000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>2000.000000</td>\n",
       "      <td>2004.000000</td>\n",
       "      <td>164.250000</td>\n",
       "      <td>712.250000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>190.000000</td>\n",
       "      <td>313.000000</td>\n",
       "      <td>215245.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>2010.000000</td>\n",
       "      <td>2010.000000</td>\n",
       "      <td>1600.000000</td>\n",
       "      <td>5644.000000</td>\n",
       "      <td>1474.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 259 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        MSSubClass  LotFrontage        LotArea  OverallQual  OverallCond  \\\n",
       "count  1460.000000  1460.000000    1460.000000  1460.000000  1460.000000   \n",
       "mean     56.897260    70.176370   10516.828082     6.099315     5.575342   \n",
       "std      42.300571    22.433457    9981.264932     1.382997     1.112799   \n",
       "min      20.000000    21.000000    1300.000000     1.000000     1.000000   \n",
       "25%      20.000000    60.000000    7553.500000     5.000000     5.000000   \n",
       "50%      50.000000    70.000000    9478.500000     6.000000     5.000000   \n",
       "75%      70.000000    80.000000   11601.500000     7.000000     6.000000   \n",
       "max     190.000000   313.000000  215245.000000    10.000000     9.000000   \n",
       "\n",
       "         YearBuilt  YearRemodAdd   MasVnrArea   BsmtFinSF1   BsmtFinSF2  \\\n",
       "count  1460.000000   1460.000000  1460.000000  1460.000000  1460.000000   \n",
       "mean   1971.267808   1984.865753   103.117123   443.639726    46.549315   \n",
       "std      30.202904     20.645407   180.731373   456.098091   161.319273   \n",
       "min    1872.000000   1950.000000     0.000000     0.000000     0.000000   \n",
       "25%    1954.000000   1967.000000     0.000000     0.000000     0.000000   \n",
       "50%    1973.000000   1994.000000     0.000000   383.500000     0.000000   \n",
       "75%    2000.000000   2004.000000   164.250000   712.250000     0.000000   \n",
       "max    2010.000000   2010.000000  1600.000000  5644.000000  1474.000000   \n",
       "\n",
       "               ...            SaleType_ConLI  SaleType_ConLw  SaleType_New  \\\n",
       "count          ...               1460.000000     1460.000000   1460.000000   \n",
       "mean           ...                  0.003425        0.003425      0.083562   \n",
       "std            ...                  0.058440        0.058440      0.276824   \n",
       "min            ...                  0.000000        0.000000      0.000000   \n",
       "25%            ...                  0.000000        0.000000      0.000000   \n",
       "50%            ...                  0.000000        0.000000      0.000000   \n",
       "75%            ...                  0.000000        0.000000      0.000000   \n",
       "max            ...                  1.000000        1.000000      1.000000   \n",
       "\n",
       "       SaleType_Oth  SaleType_WD  SaleCondition_AdjLand  SaleCondition_Alloca  \\\n",
       "count   1460.000000  1460.000000            1460.000000           1460.000000   \n",
       "mean       0.002055     0.867808               0.002740              0.008219   \n",
       "std        0.045299     0.338815               0.052289              0.090317   \n",
       "min        0.000000     0.000000               0.000000              0.000000   \n",
       "25%        0.000000     1.000000               0.000000              0.000000   \n",
       "50%        0.000000     1.000000               0.000000              0.000000   \n",
       "75%        0.000000     1.000000               0.000000              0.000000   \n",
       "max        1.000000     1.000000               1.000000              1.000000   \n",
       "\n",
       "       SaleCondition_Family  SaleCondition_Normal  SaleCondition_Partial  \n",
       "count           1460.000000           1460.000000            1460.000000  \n",
       "mean               0.013699              0.820548               0.085616  \n",
       "std                0.116277              0.383862               0.279893  \n",
       "min                0.000000              0.000000               0.000000  \n",
       "25%                0.000000              1.000000               0.000000  \n",
       "50%                0.000000              1.000000               0.000000  \n",
       "75%                0.000000              1.000000               0.000000  \n",
       "max                1.000000              1.000000               1.000000  \n",
       "\n",
       "[8 rows x 259 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_dummy = pd.get_dummies(X, drop_first= True)\n",
    "X_train = X_dummy[~np.isnan(y)]\n",
    "X_test = X_dummy[np.isnan(y)]\n",
    "y_train = y[~np.isnan(y)]\n",
    "    \n",
    "pd.DataFrame(X_train).describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 20 candidates, totalling 100 fits\n",
      "best params {'fit__alpha': 0.0042813323987193957} best scores 0.0216501437072\n",
      "CPU times: user 3.61 s, sys: 226 ms, total: 3.83 s\n",
      "Wall time: 3.83 s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.8s finished\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "\n",
    "pipe = pipeline.Pipeline([\n",
    "    (\"poly\", preprocessing.PolynomialFeatures(degree=1, include_bias=False)),\n",
    "    (\"scaler\", preprocessing.StandardScaler()),\n",
    "    (\"fit\", linear_model.Lasso())\n",
    "])\n",
    "param_grid = {\n",
    "    \"fit__alpha\": 10 ** np.linspace(-3, 1, 20)\n",
    "}\n",
    "gs = model_selection.GridSearchCV(cv=5, estimator=pipe, verbose = True,\n",
    "                scoring=\"neg_mean_squared_error\", param_grid=param_grid)\n",
    "\n",
    "gs.fit(X_train, y_train)\n",
    "print(\"best params\", gs.best_params_, \"best scores\", - gs.best_score_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 20 candidates, totalling 100 fits\n",
      "best params {'fit__alpha': 100.0} best scores 0.0223362035465\n",
      "CPU times: user 6.32 s, sys: 570 ms, total: 6.89 s\n",
      "Wall time: 3.42 s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    3.4s finished\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "\n",
    "pipe = pipeline.Pipeline([\n",
    "    (\"poly\", preprocessing.PolynomialFeatures(degree=1, include_bias=False)),\n",
    "    (\"scaler\", preprocessing.StandardScaler()),\n",
    "    (\"fit\", linear_model.Ridge(random_state = 1))\n",
    "])\n",
    "param_grid = {\n",
    "    \"fit__alpha\": 10 ** np.linspace(-3, 2, 20)\n",
    "}\n",
    "gs = model_selection.GridSearchCV(cv=5, estimator=pipe, verbose = True,\n",
    "                scoring=\"neg_mean_squared_error\", param_grid=param_grid)\n",
    "\n",
    "gs.fit(X_train, y_train)\n",
    "print(\"best params\", gs.best_params_, \"best scores\", - gs.best_score_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 50 candidates, totalling 250 fits\n",
      "best params {'fit__alpha': 1.0, 'fit__l1_ratio': 0.10000000000000001, 'fit__loss': 'huber'} best scores 0.0256927996113\n",
      "CPU times: user 23.9 s, sys: 1.14 s, total: 25 s\n",
      "Wall time: 25 s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done 250 out of 250 | elapsed:   24.8s finished\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "\n",
    "pipe = pipeline.Pipeline([\n",
    "    (\"poly\", preprocessing.PolynomialFeatures(degree=1, include_bias=False)),\n",
    "    (\"scaler\", preprocessing.StandardScaler()),\n",
    "    (\"fit\", linear_model.SGDRegressor(random_state=1, max_iter = 10000, tol=1e-6))\n",
    "])\n",
    "param_grid = {\n",
    "    \"fit__alpha\": 10 ** np.linspace(0, 2, 5),\n",
    "    \"fit__loss\": [\"squared_loss\", \"huber\"],\n",
    "    \"fit__l1_ratio\": np.linspace(0.1, 0.9, 5)\n",
    "}\n",
    "gs = model_selection.GridSearchCV(cv=5, estimator=pipe, verbose = True,\n",
    "                scoring=\"neg_mean_squared_error\", param_grid=param_grid)\n",
    "\n",
    "gs.fit(X_train, y_train)\n",
    "print(\"best params\", gs.best_params_, \"best scores\", - gs.best_score_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 10 candidates, totalling 50 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:   44.4s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best params {'fit__alpha': 0.001, 'fit__learning_rate': 0.1} best scores 0.017065447917\n",
      "CPU times: user 45 s, sys: 331 ms, total: 45.3 s\n",
      "Wall time: 45.4 s\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "\n",
    "pipe = pipeline.Pipeline([\n",
    "    (\"poly\", preprocessing.PolynomialFeatures(degree=1, include_bias=False)),\n",
    "    (\"scaler\", preprocessing.StandardScaler()),\n",
    "    (\"fit\", ensemble.GradientBoostingRegressor(random_state=1, learning_rate=0.1, ))\n",
    "])\n",
    "\n",
    "param_grid = {\n",
    "    \"fit__learning_rate\": [0.1, 0.01],\n",
    "    \"fit__alpha\": np.linspace(0.001, 0.999, 5),\n",
    "}\n",
    "gs = model_selection.GridSearchCV(cv=5, estimator=pipe, verbose = True,\n",
    "                scoring=\"neg_mean_squared_error\", param_grid=param_grid)\n",
    "\n",
    "gs.fit(X_train, y_train)\n",
    "print(\"best params\", gs.best_params_, \"best scores\", - gs.best_score_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 5 candidates, totalling 25 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done  25 out of  25 | elapsed:   34.9s finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best params {'fit__reg_alpha': 0.31622776601683794} best scores 0.018332738302\n",
      "CPU times: user 36.4 s, sys: 206 ms, total: 36.6 s\n",
      "Wall time: 36.6 s\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "\n",
    "import xgboost as xgb\n",
    "\n",
    "pipe = pipeline.Pipeline([\n",
    "    (\"poly\", preprocessing.PolynomialFeatures(degree=1, include_bias=False)),\n",
    "    (\"scaler\", preprocessing.StandardScaler()),\n",
    "    (\"fit\", xgb.XGBRegressor(max_depth=10, learning_rate=0.1, n_estimators=100, \n",
    "                     objective='reg:linear', booster='gbtree', random_state=1))\n",
    "])\n",
    "\n",
    "param_grid = {\n",
    "    \"fit__reg_alpha\": 10 ** np.linspace(-1, 1, 5),\n",
    "#    \"fit__max_depth\": 2 * np.arange(1, 10),\n",
    "#    \"fit__reg_lambda\": np.linspace(0.1, 0.9, 5)\n",
    "}\n",
    "gs = model_selection.GridSearchCV(cv=5, estimator=pipe, verbose = True,\n",
    "                scoring=\"neg_mean_squared_error\", param_grid=param_grid)\n",
    "\n",
    "gs.fit(X_train, y_train)\n",
    "print(\"best params\", gs.best_params_, \"best scores\", - gs.best_score_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
